<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" name="viewport"/>
  <title>
   Spark 1.4为DataFrame新增的统计与数学函数  | 数螺 | NAUT IDEA
  </title>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap-theme.min.css" rel="stylesheet"/>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap.min.css" rel="stylesheet"/>
  <style type="text/css">
   #xmain img {
                  max-width: 100%;
                  display: block;
                  margin-top: 10px;
                  margin-bottom: 10px;
                }

                #xmain p {
                    line-height:150%;
                    font-size: 16px;
                    margin-top: 20px;
                }

                #xmain h2 {
                    font-size: 24px;
                }

                #xmain h3 {
                    font-size: 20px;
                }

                #xmain h4 {
                    font-size: 18px;
                }


                .header {
	           background-color: #0099ff;
	           color: #ffffff;
	           margin-bottom: 20px;
	        }

	        .header p {
                  margin: 0px;
                  padding: 10px 0;
                  display: inline-block;  
                  vertical-align: middle;
                  font-size: 16px;
               }

               .header a {
                 color: white;
               }

              .header img {
                 height: 25px;
              }
  </style>
  <script src="http://cdn.bootcss.com/jquery/3.0.0/jquery.min.js">
  </script>
  <script src="http://nautstatic-10007657.file.myqcloud.com/static/css/readability.min.js" type="text/javascript">
  </script>
  <script type="text/javascript">
   $(document).ready(function() {
                 var loc = document.location;
                 var uri = {
                  spec: "http://dataunion.org/19375.html",
                  host: "http://dataunion.org",
                  prePath: "http://dataunion.org",
                  scheme: "http",
                  pathBase: "http://dataunion.org/"
                 };
    
                 var documentClone = document.cloneNode(true);
                 var article = new Readability(uri, documentClone).parse();
     
                 document.getElementById("xmain").innerHTML = article.content;
                });
  </script>
  <!-- 1466457589: Accept with keywords: (title(0.4):Spark,社区,数学,函数,数盟, topn(0.633333333333):社区,数盟,深度学习,行业资讯,数据挖掘,变量,数据分析,交叉,Python,职业规划,例子,基础架构,相关性,计算,文章,数学,Spark,数据,函数,样本,协方差,随机,可视化,列联表,spark,汇总表,编程语言,分类,人工智能,描述性).-->
 </head>
 <body onload="">
  <div class="header">
   <div class="container">
    <div class="row">
     <div class="col-xs-6 col-sm-6 text-left">
      <a href="/databee">
       <img src="http://nautidea-10007657.cos.myqcloud.com/logo_white.png"/>
      </a>
      <a href="/databee">
       <p>
        数螺
       </p>
      </a>
     </div>
     <div class="hidden-xs col-sm-6 text-right">
      <p>
       致力于数据科学的推广和知识传播
      </p>
     </div>
    </div>
   </div>
  </div>
  <div class="container text-center">
   <h1>
    Spark 1.4为DataFrame新增的统计与数学函数
   </h1>
  </div>
  <div class="container" id="xmain">
   ﻿﻿
   <title>
    Spark 1.4为DataFrame新增的统计与数学函数 | 数盟社区
   </title>
   <!-- All in One SEO Pack 2.2.7.6.2 by Michael Torbert of Semper Fi Web Design[32,76] -->
   <!-- /all in one seo pack -->
   <!--
<div align="center">
<a href="http://strata.oreilly.com.cn/hadoop-big-data-cn?cmp=mp-data-confreg-home-stcn16_dataunion_pc" target="_blank"><img src="http://dataunion.org/wp-content/uploads/2016/05/stratabj.jpg"/ ></a>
</div>
-->
   <header id="header-web">
    <div class="header-main">
     <hgroup class="logo">
      <h1>
       <a href="http://dataunion.org/" rel="home" title="数盟社区">
        <img src="http://dataunion.org/wp-content/themes/yzipi/images/logo.png"/>
       </a>
      </h1>
     </hgroup>
     <!--logo-->
     <nav class="header-nav">
      <ul class="menu" id="menu-%e4%b8%bb%e8%8f%9c%e5%8d%95">
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-71" id="menu-item-71">
        <a href="http://dataunion.org/category/events" title="events">
         活动
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-22457" id="menu-item-22457">
          <a href="http://dataunion.org/2016timeline">
           2016档期
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-22459" id="menu-item-22459">
          <a href="http://dataunion.org/category/parterc">
           合作会议
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-post-ancestor menu-item-has-children menu-item-20869" id="menu-item-20869">
        <a href="http://dataunion.org/category/tech" title="articles">
         文章
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-post-ancestor current-menu-parent current-post-parent menu-item-20867" id="menu-item-20867">
          <a href="http://dataunion.org/category/tech/base" title="base">
           基础架构
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3302" id="menu-item-3302">
          <a href="http://dataunion.org/category/tech/ai" title="ai">
           人工智能
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3303" id="menu-item-3303">
          <a href="http://dataunion.org/category/tech/analysis" title="analysis">
           数据分析
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21920" id="menu-item-21920">
          <a href="http://dataunion.org/category/tech/dm">
           数据挖掘
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3314" id="menu-item-3314">
          <a href="http://dataunion.org/category/tech/viz" title="viz">
           可视化
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3305" id="menu-item-3305">
          <a href="http://dataunion.org/category/tech/devl" title="devl">
           编程语言
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-20876" id="menu-item-20876">
        <a href="http://dataunion.org/category/industry">
         行业
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-16328" id="menu-item-16328">
          <a href="http://dataunion.org/category/industry/case" title="case">
           行业应用
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-2112" id="menu-item-2112">
          <a href="http://dataunion.org/category/industry/demo" title="demo">
           Demo展示
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21562" id="menu-item-21562">
          <a href="http://dataunion.org/category/industry/news">
           行业资讯
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-311" id="menu-item-311">
        <a href="http://dataunion.org/category/sources" title="sources">
         资源
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20870" id="menu-item-20870">
        <a href="http://dataunion.org/category/books" title="book">
         图书
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21363" id="menu-item-21363">
        <a href="http://dataunion.org/category/training">
         课程
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-21853" id="menu-item-21853">
        <a href="http://dataunion.org/category/jobs">
         职位
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-22050" id="menu-item-22050">
          <a href="http://dataunion.org/category/career">
           职业规划
          </a>
         </li>
        </ul>
       </li>
      </ul>
     </nav>
     <!--header-nav-->
    </div>
   </header>
   <!--header-web-->
   <div id="main">
    <div id="soutab">
     <form action="http://dataunion.org/" class="search" method="get">
     </form>
    </div>
    <div id="container">
     <nav id="mbx">
      当前位置：
      <a href="http://dataunion.org">
       首页
      </a>
      &gt;
      <a href="http://dataunion.org/category/tech">
       文章
      </a>
      &gt;
      <a href="http://dataunion.org/category/tech/base">
       基础架构
      </a>
      &gt;  正文
     </nav>
     <!--mbx-->
     <article class="content">
      <header align="centre" class="contenttitle">
       <div class="mscc">
        <h1 class="mscctitle">
         <a href="http://dataunion.org/19375.html">
          Spark 1.4为DataFrame新增的统计与数学函数
         </a>
        </h1>
        <address class="msccaddress ">
         <em>
          3,112 次阅读 -
         </em>
         <a href="http://dataunion.org/category/tech/base" rel="category tag">
          基础架构
         </a>
        </address>
       </div>
      </header>
      <div class="content-text">
       <p>
        出处：
        <a href="http://www.iteblog.com/archives/1382">
         过往记忆
        </a>
       </p>
       <p>
        社区在
        <span class="wp_keywordlink_affiliate">
         <a data-original-title="View all posts in Spark" href="http://www.iteblog.com/archives/tag/spark" target="_blank" title="">
          Spark
         </a>
        </span>
        1.3中开始引入了DataFrames，使得Apache
        <span class="wp_keywordlink_affiliate">
         <a data-original-title="View all posts in Spark" href="http://www.iteblog.com/archives/tag/spark" target="_blank" title="">
          Spark
         </a>
        </span>
        更加容易被使用。受R和
        <span class="wp_keywordlink_affiliate">
         <a data-original-title="View all posts in Python" href="http://www.iteblog.com/archives/tag/python" target="_blank" title="">
          Python
         </a>
        </span>
        中的data frames激发，Spark中的DataFrames提供了一些API，这些API在外部看起来像是操作单机的数据一样，而数据科学家对这些API非常地熟悉。统计是日常数据科学的一个重要组成部分。在即将发布的Spark 1.4中改进支持统计函数和数学函数（statistical and mathematical functions）。
       </p>
       <p>
        这篇文章中将介绍一些非常重要的函数，包括：
        <br/>
        1、随机数据生成(Random data generation)；
        <br/>
        2、总结和描述性统计(Summary and descriptive statistics)；
        <br/>
        3、样本协方差和相关性(Sample covariance and correlation)；
        <br/>
        4、交叉分类汇总表（又称列联表）(Cross tabulation)；
        <br/>
        5、频繁项(Frequent items)；
        <br/>
        6、数学函数(Mathematical functions)。
       </p>
       <p>
        下面的例子全部是使用
        <span class="wp_keywordlink_affiliate">
         <a data-original-title="View all posts in Python" href="http://www.iteblog.com/archives/tag/python" target="_blank" title="">
          Python
         </a>
        </span>
        语言实现，在Scala和Java中存在类似的API。
       </p>
       <h2>
        一、随机数据生成(Random data generation)
       </h2>
       <p>
        随机数据生成在测试现有的算法和实现随机算法中非常重要，比如随机投影。在
        <code>
         sql.functions
        </code>
        函数里面提供了生成包含i.i.
        <code>
         uniform(rand)
        </code>
        和标准的
        <code>
         normal(randn)
        </code>
        。
       </p>
       <blockquote>
        <p>
        </p>
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a9714715342261" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           In [1]: from pyspark.sql.functions import rand, randn
In [2]: # Create a DataFrame with one int column and 10 rows.
In [3]: df = sqlContext.range(0, 10)
In [4]: df.show()
+--+
|id|
+--+
| 0|
| 1|
| 2|
| 3|
| 4|
| 5|
| 6|
| 7|
| 8|
| 9|
+--+

In [4]: # Generate two other columns using uniform distribution and normal distribution.
In [5]: df.select("id", rand(seed=10).alias("uniform"), randn(seed=27).alias("normal")).show()
+--+-------------------+--------------------+
|id|            uniform|              normal|
+--+-------------------+--------------------+
| 0| 0.7224977951905031| -0.1875348803463305|
| 1| 0.2953174992603351|-0.26525647952450265|
| 2| 0.4536856090041318| -0.7195024130068081|
| 3| 0.9970412477032209|  0.5181478766595276|
| 4|0.19657711634539565|  0.7316273979766378|
| 5|0.48533720635534006| 0.07724879367590629|
| 6| 0.7369825278894753| -0.5462256961278941|
| 7| 0.5241113627472694| -0.2542275002421211|
| 8| 0.2977697066654349| -0.5752237580095868|
| 9| 0.5060159582230856|  1.0900096472044518|
+--+-------------------+--------------------+
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-8">
                8
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-9">
                9
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-10">
                10
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-11">
                11
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-12">
                12
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-13">
                13
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-14">
                14
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-15">
                15
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-16">
                16
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-17">
                17
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-18">
                18
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-19">
                19
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-20">
                20
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-21">
                21
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-22">
                22
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-23">
                23
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-24">
                24
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-25">
                25
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-26">
                26
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-27">
                27
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-28">
                28
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-29">
                29
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-30">
                30
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-31">
                31
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-32">
                32
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-33">
                33
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9714715342261-34">
                34
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9714715342261-35">
                35
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db8a9714715342261-1">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 from
                </span>
                <span class="crayon-v">
                 pyspark
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 sql
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 functions
                </span>
                <span class="crayon-e">
                 import
                </span>
                <span class="crayon-v">
                 rand
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 randn
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-2">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-p">
                 # Create a DataFrame with one int column and 10 rows.
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-3">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 sqlContext
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 range
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-4">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 show
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-5">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-6">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-v">
                 id
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-7">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-8">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-9">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-10">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-11">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-12">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-13">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 5
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-14">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 6
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-15">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 7
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-16">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 8
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-17">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 9
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-18">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-19">
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-20">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-p">
                 # Generate two other columns using uniform distribution and normal distribution.
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-21">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 5
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 select
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 "id"
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 rand
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 seed
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 alias
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 "uniform"
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 randn
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 seed
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 27
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 alias
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 "normal"
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 show
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-22">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-23">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-v">
                 id
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 uniform
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 normal
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-24">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-25">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.7224977951905031
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-cn">
                 0.1875348803463305
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-26">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.2953174992603351
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-cn">
                 0.26525647952450265
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-27">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.4536856090041318
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-cn">
                 0.7195024130068081
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-28">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.9970412477032209
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.5181478766595276
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-29">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.19657711634539565
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.7316273979766378
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-30">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 5
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.48533720635534006
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.07724879367590629
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-31">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 6
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.7369825278894753
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-cn">
                 0.5462256961278941
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-32">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 7
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.5241113627472694
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-cn">
                 0.2542275002421211
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-33">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 8
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.2977697066654349
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-cn">
                 0.5752237580095868
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9714715342261-34">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 9
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.5060159582230856
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.0900096472044518
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9714715342261-35">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0452 seconds] -->
        <p>
        </p>
       </blockquote>
       <h2>
        二、总结和描述性统计(Summary and descriptive statistics)
       </h2>
       <p>
        我们在导入数据之后的第一个操作是想获取一些数据，来看看他到底是不是我们所要的。对于数字列，了解这些数据的描述性统计可以帮助我们理解我们数据的分布。
        <code>
         describe
        </code>
        函数返回的是一个DataFrame，而这个DataFrame中包含了每个数字列的很多信息，比如不为空的实体总数、平均值、标准差以及最大最小值。
       </p>
       <blockquote>
        <p>
        </p>
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a9723697606934" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           In [1]: from pyspark.sql.functions import rand, randn
In [2]: # A slightly different way to generate the two random columns
In [3]: df = sqlContext.range(0, 10).withColumn('uniform', rand(seed=10)).withColumn('normal', randn(seed=27))

In [4]: df.describe().show()
+-------+------------------+-------------------+--------------------+
|summary|                id|            uniform|              normal|
+-------+------------------+-------------------+--------------------+
|  count|                10|                 10|                  10|
|   mean|               4.5| 0.5215336029384192|-0.01309370117407197|
| stddev|2.8722813232690143|  0.229328162820653|  0.5756058014772729|
|    min|                 0|0.19657711634539565| -0.7195024130068081|
|    max|                 9| 0.9970412477032209|  1.0900096472044518|
+-------+------------------+-------------------+--------------------+
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db8a9723697606934-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9723697606934-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9723697606934-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9723697606934-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9723697606934-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9723697606934-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9723697606934-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9723697606934-8">
                8
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9723697606934-9">
                9
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9723697606934-10">
                10
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9723697606934-11">
                11
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9723697606934-12">
                12
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9723697606934-13">
                13
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9723697606934-14">
                14
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db8a9723697606934-1">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 from
                </span>
                <span class="crayon-v">
                 pyspark
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 sql
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 functions
                </span>
                <span class="crayon-e">
                 import
                </span>
                <span class="crayon-v">
                 rand
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 randn
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9723697606934-2">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-p">
                 # A slightly different way to generate the two random columns
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9723697606934-3">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 sqlContext
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 range
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 withColumn
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'uniform'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 rand
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 seed
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 withColumn
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'normal'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 randn
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 seed
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 27
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9723697606934-4">
               </div>
               <div class="crayon-line" id="crayon-57685db8a9723697606934-5">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 describe
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 show
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9723697606934-6">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9723697606934-7">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-v">
                 summary
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 id
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 uniform
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 normal
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9723697606934-8">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9723697606934-9">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 count
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9723697606934-10">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 mean
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 4.5
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.5215336029384192
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-cn">
                 0.01309370117407197
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9723697606934-11">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 stddev
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 2.8722813232690143
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.229328162820653
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.5756058014772729
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9723697606934-12">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 min
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.19657711634539565
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-cn">
                 0.7195024130068081
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9723697606934-13">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 max
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 9
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.9970412477032209
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.0900096472044518
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9723697606934-14">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0273 seconds] -->
        <p>
        </p>
       </blockquote>
       <div class="syntaxhighlighter " id="highlighter_749751">
       </div>
       <p>
        如果返回的DataFrame含有大量的列，你可以返回其中的一部分列：
       </p>
       <blockquote>
        <p>
        </p>
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a972c566011951" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           In [4]: df.describe('uniform', 'normal').show()
+-------+-------------------+--------------------+
|summary|            uniform|              normal|
+-------+-------------------+--------------------+
|  count|                 10|                  10|
|   mean| 0.5215336029384192|-0.01309370117407197|
| stddev|  0.229328162820653|  0.5756058014772729|
|    min|0.19657711634539565| -0.7195024130068081|
|    max| 0.9970412477032209|  1.0900096472044518|
+-------+-------------------+--------------------+
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db8a972c566011951-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a972c566011951-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a972c566011951-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a972c566011951-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a972c566011951-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a972c566011951-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a972c566011951-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a972c566011951-8">
                8
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a972c566011951-9">
                9
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a972c566011951-10">
                10
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db8a972c566011951-1">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 describe
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'uniform'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 'normal'
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 show
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a972c566011951-2">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a972c566011951-3">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-v">
                 summary
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 uniform
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 normal
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a972c566011951-4">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a972c566011951-5">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 count
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a972c566011951-6">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 mean
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.5215336029384192
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-cn">
                 0.01309370117407197
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a972c566011951-7">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 stddev
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.229328162820653
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.5756058014772729
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a972c566011951-8">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 min
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.19657711634539565
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-cn">
                 0.7195024130068081
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a972c566011951-9">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 max
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.9970412477032209
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.0900096472044518
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a972c566011951-10">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0208 seconds] -->
        <p>
        </p>
       </blockquote>
       <p>
       </p>
       <p>
        当然，虽然describe在那些快速探索性数据分析中可以很好的工作，你还可以控制描述性统计的展示以及那些使用DataFrame中简单选择的列(这句话好别扭，请看英文you can also control the list of descriptive statistics and the columns they apply to using the normal select on a DataFrame:)
       </p>
       <blockquote>
        <p>
        </p>
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a9733790525134" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           In [5]: from pyspark.sql.functions import mean, min, max
In [6]: df.select([mean('uniform'), min('uniform'), max('uniform')]).show()
+------------------+-------------------+------------------+
|      AVG(uniform)|       MIN(uniform)|      MAX(uniform)|
+------------------+-------------------+------------------+
|0.5215336029384192|0.19657711634539565|0.9970412477032209|
+------------------+-------------------+------------------+
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db8a9733790525134-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9733790525134-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9733790525134-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9733790525134-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9733790525134-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9733790525134-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9733790525134-7">
                7
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db8a9733790525134-1">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 5
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 from
                </span>
                <span class="crayon-v">
                 pyspark
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 sql
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 functions
                </span>
                <span class="crayon-e">
                 import
                </span>
                <span class="crayon-v">
                 mean
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 min
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 max
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9733790525134-2">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 6
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 select
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-e">
                 mean
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'uniform'
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 min
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'uniform'
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 max
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'uniform'
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 show
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9733790525134-3">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9733790525134-4">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 AVG
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 uniform
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 MIN
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 uniform
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 MAX
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 uniform
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9733790525134-5">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9733790525134-6">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.5215336029384192
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.19657711634539565
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.9970412477032209
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9733790525134-7">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0135 seconds] -->
        <p>
        </p>
       </blockquote>
       <div class="syntaxhighlighter " id="highlighter_561111">
       </div>
       <h2>
        三、样本协方差和相关性(Sample covariance and correlation)
       </h2>
       <p>
        协方差表示的是两个变量的总体的误差。正数意味着其中一个增加，另外一个也有增加的趋势；而负数意味着其中一个数增加，另外一个有降低的趋势。DataFrame两列中的样本协方差计算可以如下：
       </p>
       <blockquote>
        <p>
        </p>
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a973a160132994" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           In [1]: from pyspark.sql.functions import rand
In [2]: df = sqlContext.range(0, 10).withColumn('rand1', rand(seed=10)).withColumn('rand2', rand(seed=27))

In [3]: df.stat.cov('rand1', 'rand2')
Out[3]: 0.009908130446217347

In [4]: df.stat.cov('id', 'id')
Out[4]: 9.166666666666666
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db8a973a160132994-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a973a160132994-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a973a160132994-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a973a160132994-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a973a160132994-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a973a160132994-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a973a160132994-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a973a160132994-8">
                8
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db8a973a160132994-1">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 from
                </span>
                <span class="crayon-v">
                 pyspark
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 sql
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 functions
                </span>
                <span class="crayon-e">
                 import
                </span>
                <span class="crayon-e">
                 rand
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a973a160132994-2">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 sqlContext
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 range
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 withColumn
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'rand1'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 rand
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 seed
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 withColumn
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'rand2'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 rand
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 seed
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 27
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a973a160132994-3">
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a973a160132994-4">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 stat
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 cov
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'rand1'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 'rand2'
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a973a160132994-5">
                <span class="crayon-v">
                 Out
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.009908130446217347
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a973a160132994-6">
               </div>
               <div class="crayon-line" id="crayon-57685db8a973a160132994-7">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 stat
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 cov
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'id'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 'id'
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a973a160132994-8">
                <span class="crayon-v">
                 Out
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 9.166666666666666
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0147 seconds] -->
        <p>
        </p>
       </blockquote>
       <div class="syntaxhighlighter " id="highlighter_182177">
       </div>
       <p>
        正如你从上面看到的，两个随机生成的列之间的协方差接近零；而id列和它自己的协方差非常大。
       </p>
       <p>
        协方差的值为9.17可能很难解释，而相关是协方差的归一化度量，这个相对更好理解，因为它提供了两个随机变量之间的统计相关性的定量测量。
       </p>
       <blockquote>
        <p>
        </p>
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a9742010162546" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           In [5]: df.stat.corr('rand1', 'rand2')
Out[5]: 0.14938694513735398

In [6]: df.stat.corr('id', 'id')
Out[6]: 1.0
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db8a9742010162546-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9742010162546-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9742010162546-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9742010162546-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9742010162546-5">
                5
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db8a9742010162546-1">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 5
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 stat
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 corr
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'rand1'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 'rand2'
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9742010162546-2">
                <span class="crayon-v">
                 Out
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 5
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.14938694513735398
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9742010162546-3">
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9742010162546-4">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 6
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 stat
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 corr
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'id'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 'id'
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9742010162546-5">
                <span class="crayon-v">
                 Out
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 6
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.0
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0142 seconds] -->
        <p>
        </p>
       </blockquote>
       <p>
        在上面的例子中，ID那列完全与相关本身；而两个随机生成的列之间的相关性非常低。
       </p>
       <h2>
        四、交叉分类汇总表（又称列联表）(Cross tabulation)
       </h2>
       <p>
        如果同时按几个变量或特征，把数据分类列表时，这样的统计表叫作交叉分类汇总表，其主要用来检验两个变量之间是否存在关系，或者说是否独立。在Spark 1.4中，我们可以计算DataFrame中两列之间的交叉分类汇总表，以便获取计算的两列中不同对的数量，下面是关于如何使用交叉表来获取列联表的例子
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a9749597295300" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          In [1]: # Create a DataFrame with two columns (name, item)
In [2]: names = ["Alice", "Bob", "Mike"]
In [3]: items = ["milk", "bread", "butter", "apples", "oranges"]
In [4]: df = sqlContext.createDataFrame([(names[i % 3], items[i % 5]) for i in range(100)], ["name", "item"])

In [5]: # Take a look at the first 10 rows.
In [6]: df.show(10)
+-----+-------+
| name|   item|
+-----+-------+
|Alice|   milk|
|  Bob|  bread|
| Mike| butter|
|Alice| apples|
|  Bob|oranges|
| Mike|   milk|
|Alice|  bread|
|  Bob| butter|
| Mike| apples|
|Alice|oranges|
+-----+-------+

In [7]: df.stat.crosstab("name", "item").show()
+---------+----+-----+------+------+-------+
|name_item|milk|bread|apples|butter|oranges|
+---------+----+-----+------+------+-------+
|      Bob|   6|    7|     7|     6|      7|
|     Mike|   7|    6|     7|     7|      6|
|    Alice|   7|    7|     6|     7|      7|
+---------+----+-----+------+------+-------+
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-2">
               2
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-3">
               3
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-4">
               4
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-5">
               5
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-6">
               6
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-7">
               7
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-8">
               8
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-9">
               9
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-10">
               10
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-11">
               11
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-12">
               12
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-13">
               13
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-14">
               14
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-15">
               15
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-16">
               16
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-17">
               17
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-18">
               18
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-19">
               19
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-20">
               20
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-21">
               21
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-22">
               22
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-23">
               23
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-24">
               24
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-25">
               25
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-26">
               26
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-27">
               27
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-28">
               28
              </div>
              <div class="crayon-num" data-line="crayon-57685db8a9749597295300-29">
               29
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9749597295300-30">
               30
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685db8a9749597295300-1">
               <span class="crayon-st">
                In
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-cn">
                1
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-p">
                # Create a DataFrame with two columns (name, item)
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-2">
               <span class="crayon-st">
                In
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-cn">
                2
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                names
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-s">
                "Alice"
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-s">
                "Bob"
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-s">
                "Mike"
               </span>
               <span class="crayon-sy">
                ]
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-3">
               <span class="crayon-st">
                In
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-cn">
                3
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                items
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-s">
                "milk"
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-s">
                "bread"
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-s">
                "butter"
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-s">
                "apples"
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-s">
                "oranges"
               </span>
               <span class="crayon-sy">
                ]
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-4">
               <span class="crayon-st">
                In
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-cn">
                4
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                df
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                sqlContext
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                createDataFrame
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                names
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-v">
                i
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                %
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                3
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                items
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-v">
                i
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                %
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                5
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-st">
                for
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-i">
                i
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-st">
                in
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                range
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                100
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-s">
                "name"
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-s">
                "item"
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-5">
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-6">
               <span class="crayon-st">
                In
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-cn">
                5
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-p">
                # Take a look at the first 10 rows.
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-7">
               <span class="crayon-st">
                In
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-cn">
                6
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                df
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                show
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                10
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-8">
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-9">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                name
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                item
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-10">
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-11">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                Alice
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                milk
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-12">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                Bob
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                bread
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-13">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                Mike
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                butter
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-14">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                Alice
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                apples
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-15">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                Bob
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                oranges
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-16">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                Mike
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                milk
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-17">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                Alice
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                bread
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-18">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                Bob
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                butter
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-19">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                Mike
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                apples
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-20">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                Alice
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                oranges
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-21">
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-22">
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-23">
               <span class="crayon-st">
                In
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                df
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                stat
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                crosstab
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-s">
                "name"
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-s">
                "item"
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                show
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-24">
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-25">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                name_item
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                milk
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                bread
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                apples
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                butter
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                oranges
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-26">
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-27">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                Bob
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                6
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                6
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-28">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                Mike
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                6
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                6
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db8a9749597295300-29">
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                Alice
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                6
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                7
               </span>
               <span class="crayon-o">
                |
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9749597295300-30">
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                --
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                +
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0533 seconds] -->
       <p>
       </p>
       <div class="syntaxhighlighter " id="highlighter_395005">
       </div>
       <p>
        我们需要记住，列的基数不能太大。也就是说，name和item distinct之后的数量不能过多。试想，如果item distinct之后的数量为10亿，那么你如何在屏幕上显示这个表？？
       </p>
       <h2>
        五、频繁项(Frequent items)
       </h2>
       <p>
        了解列中那些频繁出现的item对于我们了解数据集非常重要。在Spark 1.4中，我们可以通过使用DataFrames来发现列中的频繁项，
       </p>
       <blockquote>
        <p>
        </p>
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a9752678746583" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           In [1]: df = sqlContext.createDataFrame([(1, 2, 3) if i % 2 == 0 else (i, 2 * i, i % 4) for i in range(100)], ["a", "b", "c"])

In [2]: df.show(10)
+-+--+-+
|a| b|c|
+-+--+-+
|1| 2|3|
|1| 2|1|
|1| 2|3|
|3| 6|3|
|1| 2|3|
|5|10|1|
|1| 2|3|
|7|14|3|
|1| 2|3|
|9|18|1|
+-+--+-+

In [3]: freq = df.stat.freqItems(["a", "b", "c"], 0.4)
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db8a9752678746583-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9752678746583-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9752678746583-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9752678746583-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9752678746583-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9752678746583-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9752678746583-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9752678746583-8">
                8
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9752678746583-9">
                9
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9752678746583-10">
                10
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9752678746583-11">
                11
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9752678746583-12">
                12
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9752678746583-13">
                13
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9752678746583-14">
                14
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9752678746583-15">
                15
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9752678746583-16">
                16
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9752678746583-17">
                17
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9752678746583-18">
                18
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9752678746583-19">
                19
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db8a9752678746583-1">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 sqlContext
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 createDataFrame
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 i
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 %
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 ==
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-st">
                 else
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 i
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 *
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 i
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 i
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 %
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-i">
                 i
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 range
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-cn">
                 100
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-s">
                 "a"
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 "b"
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 "c"
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9752678746583-2">
               </div>
               <div class="crayon-line" id="crayon-57685db8a9752678746583-3">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 show
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9752678746583-4">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9752678746583-5">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-v">
                 a
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 b
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-v">
                 c
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9752678746583-6">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9752678746583-7">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9752678746583-8">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9752678746583-9">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9752678746583-10">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 6
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9752678746583-11">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9752678746583-12">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 5
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9752678746583-13">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9752678746583-14">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 7
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 14
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9752678746583-15">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9752678746583-16">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 9
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 18
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9752678746583-17">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9752678746583-18">
               </div>
               <div class="crayon-line" id="crayon-57685db8a9752678746583-19">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 freq
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 stat
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 freqItems
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-s">
                 "a"
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 "b"
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 "c"
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.4
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0167 seconds] -->
        <p>
        </p>
       </blockquote>
       <p>
        对应上面的DataFrame，下面的代码可以计算出每列中出现40%的频繁项
       </p>
       <blockquote>
        <p>
        </p>
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a9759651321760" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           In [4]: freq.collect()[0]
Out[4]: Row(a_freqItems=[11, 1], b_freqItems=[2, 22], c_freqItems=[1, 3])
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db8a9759651321760-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9759651321760-2">
                2
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db8a9759651321760-1">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 freq
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 collect
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-sy">
                 ]
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9759651321760-2">
                <span class="crayon-v">
                 Out
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 Row
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 a_freqItems
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 11
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 b_freqItems
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 22
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 c_freqItems
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0081 seconds] -->
        <p>
        </p>
       </blockquote>
       <div class="syntaxhighlighter " id="highlighter_410370">
       </div>
       <p>
        正如你所看到的，11和1是列a的频繁值。同样，你也可以获取到列组合的频繁项，我们可以通过struct函数来创建列组合
       </p>
       <blockquote>
        <p>
        </p>
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a9760961024134" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           In [5]: from pyspark.sql.functions import struct

In [6]: freq = df.withColumn('ab', struct('a', 'b')).stat.freqItems(['ab'], 0.4)

In [7]: freq.collect()[0]
Out[7]: Row(ab_freqItems=[Row(a=11, b=22), Row(a=1, b=2)])
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db8a9760961024134-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9760961024134-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9760961024134-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9760961024134-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9760961024134-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9760961024134-6">
                6
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db8a9760961024134-1">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 5
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 from
                </span>
                <span class="crayon-v">
                 pyspark
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 sql
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 functions
                </span>
                <span class="crayon-e">
                 import
                </span>
                <span class="crayon-t">
                 struct
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9760961024134-2">
               </div>
               <div class="crayon-line" id="crayon-57685db8a9760961024134-3">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 6
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 freq
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 withColumn
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'ab'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-t">
                 struct
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'a'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 'b'
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 stat
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 freqItems
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-s">
                 'ab'
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.4
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9760961024134-4">
               </div>
               <div class="crayon-line" id="crayon-57685db8a9760961024134-5">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 7
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 freq
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 collect
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-sy">
                 ]
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9760961024134-6">
                <span class="crayon-v">
                 Out
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 7
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 Row
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 ab_freqItems
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-e">
                 Row
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 a
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 11
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 b
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 22
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 Row
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 a
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 b
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0093 seconds] -->
        <p>
        </p>
       </blockquote>
       <div class="syntaxhighlighter " id="highlighter_484626">
       </div>
       <p>
        对于上面的例子来说，“a=11 and b=22”和“a=1 and b=2”的组合频繁出现在数据集中。注意“a=11 and b=22”是为假阳性。
       </p>
       <h2>
        六、数学函数(Mathematical functions)
       </h2>
       <p>
        Spark 1.4中增加了一系列的数学函数，用户可以自如地将这些操作应用到他们列。我可以在
        <a data-original-title="" href="https://github.com/apache/spark/blob/efe3bfdf496aa6206ace2697e31dd4c0c3c824fb/python/pyspark/sql/functions.py#L109" target="_blank" title="">
         这里
        </a>
        看到所有的数学函数。输入必须是一个列函数，并且这个列函数只能输入一个参数，比如cos, sin, floor, ceil。对于那些需要输入两个参数的列函数，比如pow, hypot，我们可以输入两列或者列的组合。
       </p>
       <blockquote>
        <p>
        </p>
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db8a9768404271662" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           In [1]: from pyspark.sql.functions import *
In [2]: df = sqlContext.range(0, 10).withColumn('uniform', rand(seed=10) * 3.14)

In [3]: # you can reference a column or supply the column name
In [4]: df.select(
   ...:   'uniform',
   ...:   toDegrees('uniform'),
   ...:   (pow(cos(df['uniform']), 2) + pow(sin(df.uniform), 2)). \
   ...:     alias("cos^2 + sin^2")).show()

+--------------------+------------------+------------------+
|             uniform|  DEGREES(uniform)|     cos^2 + sin^2|
+--------------------+------------------+------------------+
|  0.7224977951905031| 41.39607437192317|               1.0|
|  0.3312021111290707|18.976483133518624|0.9999999999999999|
|  0.2953174992603351|16.920446323975014|               1.0|
|0.018326130186194667| 1.050009914476252|0.9999999999999999|
|  0.3163135293051941|18.123430232075304|               1.0|
|  0.4536856090041318| 25.99427062175921|               1.0|
|   0.873869321369476| 50.06902396043238|0.9999999999999999|
|  0.9970412477032209| 57.12625549385224|               1.0|
| 0.19657711634539565| 11.26303911544332|1.0000000000000002|
|  0.9632338825504894| 55.18923615414307|               1.0|
+--------------------+------------------+------------------+
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-8">
                8
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-9">
                9
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-10">
                10
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-11">
                11
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-12">
                12
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-13">
                13
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-14">
                14
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-15">
                15
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-16">
                16
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-17">
                17
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-18">
                18
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-19">
                19
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-20">
                20
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-21">
                21
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-22">
                22
               </div>
               <div class="crayon-num" data-line="crayon-57685db8a9768404271662-23">
                23
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db8a9768404271662-24">
                24
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db8a9768404271662-1">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 from
                </span>
                <span class="crayon-v">
                 pyspark
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 sql
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 functions
                </span>
                <span class="crayon-e ">
                 import *
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-2">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 sqlContext
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 range
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 withColumn
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'uniform'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 rand
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 seed
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-cn">
                 10
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 *
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 3.14
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-3">
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-4">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-p">
                 # you can reference a column or supply the column name
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-5">
                <span class="crayon-st">
                 In
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-cn">
                 4
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 select
                </span>
                <span class="crayon-sy">
                 (
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-6">
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 'uniform'
                </span>
                <span class="crayon-sy">
                 ,
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-7">
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 toDegrees
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 'uniform'
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ,
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-8">
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-e">
                 pow
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-e">
                 cos
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-s">
                 'uniform'
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 pow
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-e">
                 sin
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 df
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 uniform
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 \
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-9">
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 alias
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-s">
                 "cos^2 + sin^2"
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 show
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-10">
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-11">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-12">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 uniform
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 DEGREES
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 uniform
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 cos
                </span>
                <span class="crayon-o">
                 ^
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 sin
                </span>
                <span class="crayon-o">
                 ^
                </span>
                <span class="crayon-cn">
                 2
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-13">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-14">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.7224977951905031
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 41.39607437192317
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.0
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-15">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.3312021111290707
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 18.976483133518624
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.9999999999999999
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-16">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.2953174992603351
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 16.920446323975014
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.0
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-17">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.018326130186194667
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.050009914476252
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.9999999999999999
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-18">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.3163135293051941
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 18.123430232075304
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.0
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-19">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.4536856090041318
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 25.99427062175921
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.0
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-20">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.873869321369476
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 50.06902396043238
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 0.9999999999999999
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-21">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.9970412477032209
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 57.12625549385224
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.0
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-22">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.19657711634539565
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 11.26303911544332
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-cn">
                 1.0000000000000002
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db8a9768404271662-23">
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 0.9632338825504894
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 55.18923615414307
                </span>
                <span class="crayon-o">
                 |
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-cn">
                 1.0
                </span>
                <span class="crayon-o">
                 |
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db8a9768404271662-24">
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 --
                </span>
                <span class="crayon-o">
                 +
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0258 seconds] -->
        <p>
        </p>
       </blockquote>
       <p>
        本文提到的所有函数将在Spark 1.4中可用，并且支持Python、Scala和Java调用。Spark 1.4将在近期发布。如果你等不及了，你可以到
        <a data-original-title="https://github.com/apache/spark/tree/branch-1.4" href="https://github.com/apache/spark/tree/branch-1.4" target="_blank" title="">
         https://github.com/apache/spark/tree/branch-1.4
        </a>
        里面下载。
       </p>
      </div>
      <div>
       <strong>
        注：转载文章均来自于公开网络，仅供学习使用，不会用于任何商业用途，如果侵犯到原作者的权益，请您与我们联系删除或者授权事宜，联系邮箱：contact@dataunion.org。转载数盟网站文章请注明原文章作者，否则产生的任何版权纠纷与数盟无关。
       </strong>
      </div>
      <!--content_text-->
      <div class="fenxian">
       <!-- JiaThis Button BEGIN -->
       <div class="jiathis_style_32x32">
        <p class="jiathis_button_weixin">
        </p>
        <p class="jiathis_button_tsina">
        </p>
        <p class="jiathis_button_qzone">
        </p>
        <p class="jiathis_button_cqq">
        </p>
        <p class="jiathis_button_tumblr">
        </p>
        <a class="jiathis jiathis_txt jtico jtico_jiathis" href="http://www.jiathis.com/share" target="_blank">
        </a>
        <p class="jiathis_counter_style">
        </p>
       </div>
       <!-- JiaThis Button END -->
      </div>
     </article>
     <!--content-->
     <!--相关文章-->
     <div class="xianguan">
      <div class="xianguantitle">
       相关文章！
      </div>
      <ul class="pic">
       <li>
        <a href="http://dataunion.org/20824.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/t018630756a7e263b33-300x165.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20824.html" rel="bookmark" title="如何判断一笔交易是否属于欺诈？你只是需要一点数据挖掘">
         如何判断一笔交易是否属于欺诈？你只是需要一点数据挖掘
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20820.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/1-300x200.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20820.html" rel="bookmark" title="人们对Python在企业级开发中的10大误解">
         人们对Python在企业级开发中的10大误解
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20811.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/t0133fcacae8523307b_副本-300x200.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20811.html" rel="bookmark" title="大神亲传：26条深度学习的金科玉律！">
         大神亲传：26条深度学习的金科玉律！
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20808.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/640.webp-11-300x137.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20808.html" rel="bookmark" title="我们是如何在一张地图上表现86万个数据的">
         我们是如何在一张地图上表现86万个数据的
        </a>
       </li>
      </ul>
     </div>
     <!--相关文章-->
     <div class="comment" id="comments">
      <!-- You can start editing here. -->
      <!-- If comments are open, but there are no comments. -->
      <div class="title">
       期待你一针见血的评论，Come on！
      </div>
      <div id="respond">
       <p>
        不用想啦，马上
        <a href="http://dataunion.org/wp-login.php?redirect_to=http%3A%2F%2Fdataunion.org%2F19375.html">
         "登录"
        </a>
        发表自已的想法.
       </p>
      </div>
     </div>
     <!-- .nav-single -->
    </div>
    <!--Container End-->
    <aside id="sitebar">
     <div class="sitebar_list2">
      <div class="wptag">
       <span class="tagtitle">
        热门标签+
       </span>
       <div class="tagg">
        <ul class="menu" id="menu-%e5%8f%8b%e6%83%85%e9%93%be%e6%8e%a5">
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-1605" id="menu-item-1605">
          <a href="http://taidizh.com/">
           泰迪智慧
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-20884" id="menu-item-20884">
          <a href="http://www.transwarp.cn/">
           星环科技
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-3538" id="menu-item-3538">
          <a href="http://datall.org/">
           珈和遥感
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-20888" id="menu-item-20888">
          <a href="http://www.chinahadoop.cn/">
           小象学院
          </a>
         </li>
        </ul>
       </div>
      </div>
     </div>
     <div class="sitebar_list">
      <div class="textwidget">
       <div align="center">
        <a href="http://study.163.com/course/courseMain.htm?courseId=991022" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2016/03/dv.jpg"/>
        </a>
       </div>
      </div>
     </div>
     <div class="sitebar_list">
      <h4 class="sitebar_title">
       文章分类
      </h4>
      <div class="tagcloud">
       <a class="tag-link-44" href="http://dataunion.org/category/industry/demo" style="font-size: 10.204724409449pt;" title="4个话题">
        Demo展示
       </a>
       <a class="tag-link-31" href="http://dataunion.org/category/experts" style="font-size: 15.826771653543pt;" title="52个话题">
        专家团队
       </a>
       <a class="tag-link-870" href="http://dataunion.org/category/tech/ai" style="font-size: 19.795275590551pt;" title="273个话题">
        人工智能
       </a>
       <a class="tag-link-488" href="http://dataunion.org/category/%e5%8a%a0%e5%85%a5%e6%95%b0%e7%9b%9f" style="font-size: 8pt;" title="1个话题">
        加入数盟
       </a>
       <a class="tag-link-869" href="http://dataunion.org/category/tech/viz" style="font-size: 17.204724409449pt;" title="93个话题">
        可视化
       </a>
       <a class="tag-link-30" href="http://dataunion.org/category/partners" style="font-size: 10.645669291339pt;" title="5个话题">
        合作伙伴
       </a>
       <a class="tag-link-889" href="http://dataunion.org/category/parterc" style="font-size: 11.582677165354pt;" title="8个话题">
        合作会议
       </a>
       <a class="tag-link-104" href="http://dataunion.org/category/books" style="font-size: 12.96062992126pt;" title="15个话题">
        图书
       </a>
       <a class="tag-link-220" href="http://dataunion.org/category/tech/base" style="font-size: 19.850393700787pt;" title="281个话题">
        基础架构
       </a>
       <a class="tag-link-219" href="http://dataunion.org/category/tech/analysis" style="font-size: 19.409448818898pt;" title="232个话题">
        数据分析
       </a>
       <a class="tag-link-887" href="http://dataunion.org/category/tech/dm" style="font-size: 13.291338582677pt;" title="17个话题">
        数据挖掘
       </a>
       <a class="tag-link-34" href="http://dataunion.org/category/tech" style="font-size: 20.732283464567pt;" title="404个话题">
        文章
       </a>
       <a class="tag-link-1" href="http://dataunion.org/category/uncategorized" style="font-size: 22pt;" title="693个话题">
        未分类
       </a>
       <a class="tag-link-4" href="http://dataunion.org/category/events" style="font-size: 14.503937007874pt;" title="29个话题">
        活动
       </a>
       <a class="tag-link-890" href="http://dataunion.org/category/tech/%e6%b7%b1%e5%ba%a6%e5%ad%a6%e4%b9%a0" style="font-size: 10.204724409449pt;" title="4个话题">
        深度学习
       </a>
       <a class="tag-link-221" href="http://dataunion.org/category/tech/devl" style="font-size: 18.968503937008pt;" title="193个话题">
        编程语言
       </a>
       <a class="tag-link-888" href="http://dataunion.org/category/career" style="font-size: 15.661417322835pt;" title="48个话题">
        职业规划
       </a>
       <a class="tag-link-5" href="http://dataunion.org/category/jobs" style="font-size: 14.11811023622pt;" title="25个话题">
        职位
       </a>
       <a class="tag-link-871" href="http://dataunion.org/category/industry" style="font-size: 15.716535433071pt;" title="49个话题">
        行业
       </a>
       <a class="tag-link-613" href="http://dataunion.org/category/industry/case" style="font-size: 16.984251968504pt;" title="84个话题">
        行业应用
       </a>
       <a class="tag-link-885" href="http://dataunion.org/category/industry/news" style="font-size: 17.425196850394pt;" title="102个话题">
        行业资讯
       </a>
       <a class="tag-link-10" href="http://dataunion.org/category/training" style="font-size: 14.228346456693pt;" title="26个话题">
        课程
       </a>
       <a class="tag-link-16" href="http://dataunion.org/category/sources" style="font-size: 15.661417322835pt;" title="48个话题">
        资源
       </a>
      </div>
     </div>
     <div class="sitebar_list">
      <h4 class="sitebar_title">
       功能
      </h4>
      <ul>
       <li>
        <a href="http://dataunion.org/wp-login.php?action=register">
         注册
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/wp-login.php">
         登录
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/feed">
         文章
         <abbr title="Really Simple Syndication">
          RSS
         </abbr>
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/comments/feed">
         评论
         <abbr title="Really Simple Syndication">
          RSS
         </abbr>
        </a>
       </li>
       <li>
        <a href="https://cn.wordpress.org/" title="基于WordPress，一个优美、先进的个人信息发布平台。">
         WordPress.org
        </a>
       </li>
      </ul>
     </div>
    </aside>
    <div class="clear">
    </div>
   </div>
   <!--main-->
   ﻿
   <footer id="dibu">
    <div class="about">
     <div class="right">
      <ul class="menu" id="menu-%e5%ba%95%e9%83%a8%e8%8f%9c%e5%8d%95">
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-18024" id="menu-item-18024">
        <a href="http://dataunion.org/category/partners">
         合作伙伴
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-20881" id="menu-item-20881">
        <a href="http://dataunion.org/contribute">
         文章投稿
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20872" id="menu-item-20872">
        <a href="http://dataunion.org/category/%e5%8a%a0%e5%85%a5%e6%95%b0%e7%9b%9f">
         加入数盟
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-22441" id="menu-item-22441">
        <a href="http://dataunion.org/f-links">
         友情链接
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-20874" id="menu-item-20874">
        <a href="http://dataunion.org/aboutus">
         关于数盟
        </a>
       </li>
      </ul>
      <p class="banquan">
       数盟社区        ，
        做最棒的数据科学社区
      </p>
     </div>
     <div class="left">
      <ul class="bottomlist">
       <li>
        <a href="http://weibo.com/DataScientistUnion  " target="_blank" 　title="">
         <img src="http://dataunion.org/wp-content/themes/yzipi/images/weibo.png"/>
        </a>
       </li>
       <li>
        <a class="cd-popup-trigger" href="http://dataunion.org/19375.html#0">
         <img src="http://dataunion.org/wp-content/themes/yzipi/images/weixin.png"/>
        </a>
       </li>
      </ul>
      <div class="cd-popup">
       <div class="cd-popup-container">
        <h1>
         扫描二维码,加微信公众号
        </h1>
        <img src="http://dataunion.org/wp-content/themes/yzipi/images/2014-12-06-1515289049.png"/>
        <a class="cd-popup-close" href="http://dataunion.org/19375.html">
        </a>
       </div>
       <!-- cd-popup-container -->
      </div>
      <!-- cd-popup -->
     </div>
    </div>
    <!--about-->
    <div class="bottom">
     <a href="http://dataunion.org/">
      数盟社区
     </a>
     <a href="http://www.miitbeian.gov.cn/" rel="external nofollow" target="_blank">
      京ICP备14026740号
     </a>
     联系我们：
     <a href="mailto:contact@dataunion.org" target="_blank">
      contact@dataunion.org
     </a>
     <div class="tongji">
     </div>
     <!--bottom-->
     <div class="scroll" id="scroll" style="display:none;">
      ︿
     </div>
    </div>
   </footer>
   <!--dibu-->
  </div>
 </body>
</html>